package com.g2room.io.grep;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.stream.IntStream;

/**
 * 抓取txt80网站的小说文章
 */
public class GrepTxt80 extends GrepCommon {
    public static String domain = "https://www.txt80.com";

    public static void main(String[] args) {
        pages();
    }

    public static void pages() {
        String url = "https://www.txt80.com/all/index%s.html";
        IntStream.range(0, 1330).forEach(i -> parsePage(String.format(url, i == 0 ? "" : "_" + i)));
    }

    public static void parsePage(String url) {
        try {
            Document document = getDoc(url);
            Elements elements = document.select(".list .list_l_box .slist .pic a");
            for (Element e : elements) {
                try {
                    parseDetail(e.attr("href"));
                } catch (Exception e1) {
                    e1.printStackTrace();
                }
            }
            Thread.sleep(500);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void parseDetail(String url) {
        Document document = getDoc(url);
        Elements imgEl = document.select(".pics3");
        Elements elements = document.select(".downlinks a");
        for (Element e : elements) {
            try {
                downloadNet(downloadPage(e.attr("href")), null);
                if (imgEl.size() > 0) {
                    String alt = imgEl.get(0).attr("alt");
                    String src = imgEl.get(0).attr("src");
                    downloadNet(src, alt.replace("图片", "") + src.substring(src.lastIndexOf(".")));
                }
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
    }


    public static String downloadPage(String url) {
        Document document = getDoc(url);
        Elements elements = document.select(".downlist a");
        for (Element e : elements) {
            return e.attr("href");
        }
        return null;
    }
}
